********************************************************************************
**	PURPOSE: Usage of other SLCCU products, create T7
**							
**	INPUTS: Long with survey.dta
**	
**	OUTPUTS: SLCCU_products_main.xlsx 
**
**  CREATED/EDITED BY: Kayla Wilding, Leah Kim, Hasan Ahamed
**
**	DATE CREATED: 7/2017
**
**	DATE LAST EDITED: 2/28/2023
********************************************************************************
clear all

*1. Use data file 
	use "$adta/Long with outcomes.dta", clear

*2. Locals for output filename and number of decimals in the output
	local filename "SLCCU products main"
	local decs 3

*3. Locals for the regression specs and the outcome variables of interest

	local loanspec1 post 1.enc#1.post  //Average treatment effect spec: includes (Post, CBL Group*Post)
	//local loanspec2 post 1.ltca#1.post 1.ltca#1.enc#1.post 1.htca#1.enc#1.post //HTE by baseline loan status: includes(Post,Post*No Loan, CBL Group*Post*No Loan,  CBL Group*Post*Any Loan)
	local loanspec2 1.ltca#1.post 1.mtca#1.post 1.htca#1.post 1.ltca#1.enc#1.post 1.mtca#1.enc#1.post 1.htca#1.enc#1.post
	local outcomes slccumem binnoncblslc savingsbal_meh savcheck_meh
		/*Outcome variables 
			1. binnoncblslc 
				a. desc: has a loan with slccu that is not a cbl loan; 
				b. Where is it created? script 1b
				c. Input variables and source for those: numloans 
				(which is raw from admin data) and 
				cm_open (also raw from admin) 
				d. Data source classification: admin variable
				e. Notes: 
			2. slccumem 
				a. is an indicator for if the person is still 
					a member at slccu; 
				b. it is created in script 1b and 
				c. comes from admin_pulldate(raw from admin) 
				d. so its an admin var; 
			3. savingsbal_meh
				a. Desc: Month end balance of savings account in
				 hundreds of dollars
				b.
				c. comes from raw admin data variable "savingsbal" 
				d. admin var 
			4. savcheck_me 
				a. Desc: Month end balance of savings + checkings
				 account in hundreds of dollars
				b.
				c.
				d. admin
			*/

*4. Cross-sell analysis
	local loopcount 1	//used to count whether to append or replace the outregs (i.e. when this is one, you replace the output and when its greater you append)
	foreach var in `outcomes' {
		*5a. Average Treatment Effects
			*Run the average treatment effect regression using areg for person fixed effects (aka absorb surveyid). We use if !mi(ne_ln) and !mi(onepl) because we use the heterogenous groups of no loan or any loan at baseline, but these people don't have credit information at baseline and therefore cant be categorized into a group and are excluded from the analysis. 
			cap drop __*
				areg `var' `loanspec1' if flag_randomized == 1, absorb(surveyid) vce(cluster surveyid)
					* "`var'" is the outcome variable, 
					* `"loanspec1"' calls the independent vars for the ATE, 
					* "flag_randomized" restricts to the 1531 people assigned to treatment or control, 
					* "!mi(ne_ln)" restricts the sample further to the 1502 people that were not missing credit reports at baseline
					 
					local obs = e(N)	//store the observations for the regression to output in outreg below, see outreg note for why I do this manually instead of using the built in obs in outreg
					local inds = e(df_a) + 1 	//store the number of individuals since we have multiple observations for person here but the number of obs per person varies across person for the regression to output in outreg below
				
				sum `var' if e(sample) == 1 & index == 1 & flag_extragroup == 1 //calculate the mean of the dependent variable at baseline (index == 1) for the extra step (control) group (flag_extragroup == 1)
					local mu_lhs = string(r(mean), "%14.`decs'f") 
				#d ; 
				outreg2 using "$outputtables/`filename'.xls", `=cond(`loopcount' == 1,"replace","append")'
					keep(1.enc#1.post) sortvar(1.enc#1.post) excel label nocons nor2 noobs nonote noaster dec(`decs') 
					addtext(Obs, "`obs'", Individuals, "`inds'", Mean Dependent Variable in Extra Step Group at Baseline, `mu_lhs') slow(8000) ; 
				#d cr
						sleep 2000 
						* append or replace the outregs (i.e. when this is one you replace the output and when its greater you append)
						* keep only the treatment effect variable and order it first
						* output to excel, use var labels rather than names, supress the constant, obs, r2, note, stars, and slow the output to give excel time
						* define how many decimals we want in the output - local defined at top of script
						* add the pvalues of ttests of coefficients, observations, individuals, and means 
						* I manually suppress the obs, means and do them manually to control the order of the rows. Stata puts the mean and obs above the addtext option but I want the p-values above the obs and means.

		*5b. Heterogenous Treatment Effects	by baseline loan status	
			*Run the heterogenous treatment effect by baseline loan status regression using areg for person fixed effects (aka absorb surveyid). We use if !mi(ne_ln) because we use the heterogenous groups of no loan or any loan at baseline, but these people don't have credit information at baseline and therefore cant be categorized into a group and are excluded from the analysis. 
			
				areg `var' `loanspec2' if flag_randomized == 1, absorb(surveyid) vce(cluster surveyid)
					* "`var'" is the outcome variable, 
					* `"loanspec2"' calls the independent vars for the HTE by baseline loan status 
					* "flag_randomized" restricts to the 1531 people assigned to treatment or control, 
					* "!mi(ne_ln)" restricts the sample further to the 1502 people that were not missing credit reports at baseline
				
					local obs = e(N) //store the observations for the regression to output in outreg below, see outreg note for why I do this manually instead of using the built in obs in outreg
					local inds = e(df_a) + 1 //store the number of individuals since we have multiple observations for person here but the number of obs per person varies across person for the regression to output in outreg below

				sum `var' if e(sample) == 1 & index == 1 & flag_extragroup == 1 //calculate the mean of the dependent variable at baseline (index == 1) for the extra step (control) group (flag_extragroup == 1)
					local mu_lhs = string(r(mean), "%14.`decs'f") 

				test 1.ltca#1.enc#1.post = 1.mtca#1.enc#1.post // ttest of the treatment effect coefficients equality between the two heterogenous groups (no loan at baseline vs any loan at baseline)
					local pvalue1_2 = string(r(p), "%14.`decs'f")
					
				test 1.mtca#1.enc#1.post = 1.htca#1.enc#1.post // ttest of the treatment effect coefficients equality between the two heterogenous groups (no loan at baseline vs any loan at baseline)
					local pvalue2_3 = string(r(p), "%14.`decs'f")
				
				test 1.ltca#1.enc#1.post = 1.htca#1.enc#1.post // ttest of the treatment effect coefficients equality between the two heterogenous groups (no loan at baseline vs any loan at baseline)
					local pvalue1_3 = string(r(p), "%14.`decs'f")


				#d ;
				outreg2 using "$outputtables/`filename'.xls", 
					append keep(1.ltca#1.enc#1.post 1.mtca#1.enc#1.post 1.htca#1.enc#1.post) 
					sortvar(1.enc#1.post 1.ltca#1.enc#1.post 1.htca#1.enc#1.post) 
					excel label nocons noobs nor2 nonote noaster dec(`decs') 
					addtext(P-value of 1.ltca#1.enc#1.post = 1.mtca#1.enc#1.post, `pvalue1_2',  
							P-value of 1.mtca#1.enc#1.post = 1.htca#1.enc#1.post, `pvalue2_3',  
							P-value of 1.ltca#1.enc#1.post = 1.htca#1.enc#1.post, `pvalue1_3',  
							Obs, `obs', Individuals, `inds', Mean Dependent Variable in Extra Step Group at Baseline, `mu_lhs') slow(8000) ;
				#d cr 
						sleep 2000 
						* append these regressions to the average effects since they are going in the same file
						* keep only the treatment effect variable and order it first
						* output to excel, use var labels rather than names, supress the constant, obs, r2, note, stars, and slow the output to give excel time
						* define how many decimals we want in the output - local defined at top of script
						* add the pvalues of ttests of coefficients, observations, individuals, and means 
						* I manually suppress the obs, means and do them manually to control the order of the rows. Stata puts the mean and obs above the addtext option but I want the p-values above the obs and means.
			
				*Put an empty column in between each variable 	
					outreg2 using "$outputtables/`filename'.txt", append label skip

			local ++loopcount	//increase the loopcount counter 
	}

**EOF**
